package SparkSmallPaper
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.log4j.{Level, Logger}
//x(0)是买家id，x(1)是商品id,x(2)是商品类别，x(3)是卖家id,x(4)是品牌id，x(5)是交易月，x(6)是交易日，x(7)是买家行为，x(8)是买家年龄段,x(9)是买家性别，x(10)是收获地址，
//该代码是实现查看男女买家各个年龄段交易对比
object demo03 {
    def main(args: Array[String]): Unit = {
        val conf = new SparkConf().setMaster("local[2]").setAppName(demo01.getClass.getSimpleName)
        Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
        val sc = new SparkContext(conf)
        transformationOps5(sc)
        sc.stop()
    }
  def transformationOps5(sc:SparkContext): Unit = {
        val lines = sc.textFile("file:///C:/Users/asus/Desktop/hadoop_experiment/data/user_log.csv")
        //lines.foreach(println)
        val wordsRDD = lines.map(line => line.split(",")).zipWithIndex().filter(_._2>=1).keys
        
        val age_gender=wordsRDD.map(w=>((w(8),w(9)),1)).filter(x=>(x._1._2.equals("0")||x._1._2.equals("1")))
        
        val grouRDD=age_gender.groupByKey().map(x=>(x._1._1,x._1._2,x._2.size))
        grouRDD.foreach(x=>println("年龄段："+x._1+"  性别:"+x._2+"  数量："+x._3))
    }
}